gganimateOur World in Data (OWID) is a very cool news organization that seeks to produce data driven reporting and knowledge sharing. Their work provides an outstanding template for our course final project. Their reporting is the kind of Communicating WITH Data that this course tries to foster! Some of their most recent reporting on covid can be found here and here, and notice how they are constantly making their data available! We’ll remake this plot from OWID.
library(tidyverse)
library(readxl)
OWID_covid <- read_excel("owid-covid-data.xlsx")
OWID_covid
## # A tibble: 133,082 × 65
## iso_code continent location date total_cases new_cases new_cases_smoot…
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 AFG Asia Afghanistan 2020-0… 5 5 NA
## 2 AFG Asia Afghanistan 2020-0… 5 0 NA
## 3 AFG Asia Afghanistan 2020-0… 5 0 NA
## 4 AFG Asia Afghanistan 2020-0… 5 0 NA
## 5 AFG Asia Afghanistan 2020-0… 5 0 NA
## 6 AFG Asia Afghanistan 2020-0… 5 0 0.714
## 7 AFG Asia Afghanistan 2020-0… 5 0 0.714
## 8 AFG Asia Afghanistan 2020-0… 5 0 0
## 9 AFG Asia Afghanistan 2020-0… 5 0 0
## 10 AFG Asia Afghanistan 2020-0… 5 0 0
## # … with 133,072 more rows, and 58 more variables: total_deaths <dbl>,
## # new_deaths <dbl>, new_deaths_smoothed <dbl>, total_cases_per_million <dbl>,
## # new_cases_per_million <dbl>, new_cases_smoothed_per_million <dbl>,
## # total_deaths_per_million <dbl>, new_deaths_per_million <dbl>,
## # new_deaths_smoothed_per_million <dbl>, reproduction_rate <dbl>,
## # icu_patients <lgl>, icu_patients_per_million <lgl>, hosp_patients <lgl>,
## # hosp_patients_per_million <lgl>, weekly_icu_admissions <lgl>, …
focus <- OWID_covid %>% filter(population > 10000000) %>%
filter(location != 'World') %>%
filter(continent %in% c('Europe', 'North America')) %>%
mutate(per_capita_deaths = total_deaths/population) %>%
dplyr::select(location, per_capita_deaths) %>% #
# https://stackoverflow.com/questions/24237399/how-to-select-the-rows-with-maximum-values-in-each-group-with-dplyr
group_by(location) %>% top_n(n=1) %>%
# https://dplyr.tidyverse.org/reference/arrange.html
arrange(-per_capita_deaths) %>%
# https://dplyr.tidyverse.org/reference/distinct.html
distinct()
focus
## # A tibble: 23 × 2
## # Groups: location [23]
## location per_capita_deaths
## <chr> <dbl>
## 1 Czechia 0.00293
## 2 Romania 0.00276
## 3 United States 0.00229
## 4 Belgium 0.00226
## 5 Mexico 0.00223
## 6 Italy 0.00220
## 7 United Kingdom 0.00210
## 8 Poland 0.00209
## 9 Spain 0.00188
## 10 Ukraine 0.00186
## # … with 13 more rows
# https://www.displayr.com/r-date-conversion/
OWID_covid$date <- as.Date(OWID_covid$date) # change date type.
covid_per_capita_fig <- OWID_covid %>%
filter(location %in% focus$location[1:6]) %>%
# http://www.sthda.com/english/wiki/ggplot2-line-types-how-to-change-line-types-of-a-graph-in-r-software
ggplot(mapping=aes(x=date, y=100000*new_deaths_smoothed/population,
color=location, linetype=continent, group=location)) +
geom_line() + theme_bw() +
# https://stackoverflow.com/questions/52060601/ggplot-multiple-legends-arrangement
theme(legend.position="top", legend.title=element_blank(),
legend.direction="horizontal", legend.box="vertical") +
labs(title="Covid Deaths Each Day Per 100,000 People in Countries", x="Date", y="Mortality rate")
covid_per_capita_fig
# https://www.datanovia.com/en/blog/gganimate-how-to-create-plots-with-beautiful-animation-in-r/
library(gganimate)
covid_per_capita_fig + geom_point() + transition_reveal(date)
# covid_per_capita_fig + geom_point(aes(group = seq_along(date))) + transition_reveal(date)
plotlyContinuing to use the Our World in Data (OWID) covid data, we will now add in data on covid testing.
NA).# print columns whose names contain "test".
# covid testing data is actually just NA's in this file!
OWID_covid[str_detect(colnames(OWID_covid), "test")]
## # A tibble: 133,082 × 8
## new_tests total_tests total_tests_per_th… new_tests_per_tho… new_tests_smoot…
## <lgl> <lgl> <lgl> <lgl> <lgl>
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## 7 NA NA NA NA NA
## 8 NA NA NA NA NA
## 9 NA NA NA NA NA
## 10 NA NA NA NA NA
## # … with 133,072 more rows, and 3 more variables:
## # new_tests_smoothed_per_thousand <lgl>, tests_per_case <lgl>,
## # tests_units <lgl>
OWID_covid_tests <- read_csv("full-list-total-tests-for-covid-19.csv")
OWID_covid_tests <- OWID_covid_tests %>%
rename(location=Entity, date=Day) %>%
left_join(OWID_covid, by=c("location","date"))
OWID_covid_tests
## # A tibble: 57,467 × 68
## location Code date total_tests.x `142601-annotatio… iso_code continent
## <chr> <chr> <date> <dbl> <chr> <chr> <chr>
## 1 Albania ALB 2020-02-25 8 tests performed ALB Europe
## 2 Albania ALB 2020-02-26 13 tests performed ALB Europe
## 3 Albania ALB 2020-02-27 17 tests performed ALB Europe
## 4 Albania ALB 2020-02-28 18 tests performed ALB Europe
## 5 Albania ALB 2020-02-29 26 tests performed ALB Europe
## 6 Albania ALB 2020-03-01 29 tests performed ALB Europe
## 7 Albania ALB 2020-03-02 31 tests performed ALB Europe
## 8 Albania ALB 2020-03-03 36 tests performed ALB Europe
## 9 Albania ALB 2020-03-04 42 tests performed ALB Europe
## 10 Albania ALB 2020-03-05 50 tests performed ALB Europe
## # … with 57,457 more rows, and 61 more variables: total_cases <dbl>,
## # new_cases <dbl>, new_cases_smoothed <dbl>, total_deaths <dbl>,
## # new_deaths <dbl>, new_deaths_smoothed <dbl>, total_cases_per_million <dbl>,
## # new_cases_per_million <dbl>, new_cases_smoothed_per_million <dbl>,
## # total_deaths_per_million <dbl>, new_deaths_per_million <dbl>,
## # new_deaths_smoothed_per_million <dbl>, reproduction_rate <dbl>,
## # icu_patients <lgl>, icu_patients_per_million <lgl>, hosp_patients <lgl>, …
per capita deaths (i.e., total deaths / population) and per capita tests (i.e., total tests / population).frames feature of plotly that we’ll animate our plot with needs each country to have data at every date being animated
NA’s or the frames rendering system won’t be able to maintain the identity of the points and will track them incorrectlycomplete function, and then forward and back filling all the NA’s within each country the tidy group_by, fill, and ungroup functions.frames feature currently doesn’t animate dates, so we’ll filter our data after 2021-09-01 and enumerate the days starting at 2021-09-01 to present.OWID_covid_tests2 <- OWID_covid_tests %>%
select(location, total_tests.x, total_deaths, continent, population, date) %>%
mutate(per_capita_deaths = total_deaths/population,
per_capita_tests = total_tests.x/population) %>%
# https://stackoverflow.com/questions/48633460/r-fill-missing-dates-by-group
complete(location, date) %>%
group_by(location) %>%
fill(continent, .direction="down") %>%
fill(continent, .direction="up") %>%
# https://stackoverflow.com/questions/43332417/how-to-use-fill-by-function-with-na-approx-linear-interpolation-inside-dpl
mutate(per_capita_deaths=zoo::na.approx(per_capita_deaths, na.rm=FALSE)) %>%
mutate(per_capita_tests=zoo::na.approx(per_capita_tests,na.rm=FALSE)) %>%
fill(per_capita_deaths, per_capita_tests, population, .direction="down") %>%
fill(per_capita_deaths, per_capita_tests, population, .direction="up") %>%
ungroup(location) %>%
# https://www.displayr.com/r-date-conversion/
filter(date >= as.Date("2021-09-01")) %>%
mutate(DaysSince2021_09_01 = date -as.Date("2021-09-01"))
OWID_covid_tests2
## # A tibble: 9,782 × 9
## location date total_tests.x total_deaths continent population
## <chr> <date> <dbl> <dbl> <chr> <dbl>
## 1 Albania 2021-09-01 NA NA Europe 2872934
## 2 Albania 2021-09-02 NA NA Europe 2872934
## 3 Albania 2021-09-03 NA NA Europe 2872934
## 4 Albania 2021-09-04 NA NA Europe 2872934
## 5 Albania 2021-09-05 NA NA Europe 2872934
## 6 Albania 2021-09-06 NA NA Europe 2872934
## 7 Albania 2021-09-07 NA NA Europe 2872934
## 8 Albania 2021-09-08 NA NA Europe 2872934
## 9 Albania 2021-09-09 NA NA Europe 2872934
## 10 Albania 2021-09-10 NA NA Europe 2872934
## # … with 9,772 more rows, and 3 more variables: per_capita_deaths <dbl>,
## # per_capita_tests <dbl>, DaysSince2021_09_01 <drtn>
The frames feature of plotly makes animating a scatter (or bubble) plot super easy!
library(plotly)
fig <- OWID_covid_tests2 %>% rename(`Days Since Sep 1, 2021`=DaysSince2021_09_01) %>%
plot_ly(
y = ~per_capita_deaths,
x = ~per_capita_tests,
size = ~population,
# https://stackoverflow.com/questions/38400343/r-plotly-smaller-markers-in-bubble-plot
marker = list(sizeref=0.03),
color = ~continent,
# https://community.plotly.com/t/making-persistant-selections-in-plotlys-legend-for-animations/5640
# https://plotly.com/r/legend/#grouped-legend
frame = ~`Days Since Sep 1, 2021`,
text = ~location,
hoverinfo = 'text',
type = 'scatter',
mode = 'markers'
)
# change x- and y- labels
fig %>% layout(xaxis=list(title='Per Capita Tests'),
yaxis=list(title='Per Capita Deaths'))
# change the scale. https://plotly.com/r/axes/
fig %>% layout(xaxis=list(type = "log", title="Per Capita Tests"),
yaxis=list(type = "log", title="Per Capita Deaths"))